*************************************************************************************
*** Table A12. Comparison of Different Estimators Based on Simulation ****
*************************************************************************************
use "$filepath/data/Non_payment_dataset.dta" , clear

set more off
xtset customer t


** Remove outliers ***
** Negative Payments and extreme values ** 
bysort customer: egen minpayment=min(payment) // 5 from ITT sample
drop if minpayment<0 

bysort customer: egen maxpayment=max(payment) 
_pctile maxpayment if itt!=. , nq(1000)
di r(r999)

tab itt t if payment>18561.6 & payment!=. // 12 from ITT sample
drop if maxpayment>18561.6 // removes customers with top 0.1% largest payments 



gen paidcount=0 if payment!=.
replace paidcount=1 if payment>0 & payment!=.

* Pre-treatment is before August (t=12). Started calling end of September (t=13) people received first messages in October t=14
bysort customer: egen premeaninvoice=mean(invoices) if t<=12
bysort customer: egen premeanpayment=mean(payment) if t<=12
bysort customer: egen prepaidcounttotal=sum(paidcount) if t<=12
bysort customer: egen presuminvoice=sum(invoices)  if t<=12
bysort customer: egen presumpayment=sum(payment) if t<=12
bysort customer: gen presumpaymentratio=presumpayment/presuminvoice if t<=12

gen preclosingbalance=closingbalance if t==12 

foreach var in premeaninvoice premeanpayment preclosingbalance preage_of_account prepaidcounttotal presuminvoice presumpayment presumpaymentratio {
bysort customer: egen `var'mean=mean(`var')
replace `var'=`var'mean
drop `var'mean
}

winsor premeaninvoice, gen(premeaninvoicew) p(0.01)
winsor preage_of_account, gen(preage_of_accountw) p(0.01)
winsor preclosingbalance, gen(preclosingbalancew) p(0.01)
winsor premeanpayment, gen(premeanpaymentw) p(0.01)
winsor presumpaymentratio, gen(presumpaymentratiow) p(0.01)

keep if itt!=.
keep customer t pre* payment scheme* interview* date*

save "$filepath/data/benchmark_data.dta" , replace



* Create Assignment Help Dataset *

use "$filepath/data/benchmark_data.dta" , clear
drop if payment==. // attrited customer
keep if t==14
gen z=1
collapse z, by(schemedummy customer)
sort customer
gen treatment = .
save "$filepath/temp/treatmentassignment.dta" , replace

*Generate Outcome Dataset *
clear
gen draw=.
save "$filepath/temp/results", replace 


*** Define TWO PM program for loop - Log second part **
capture program drop Ey_boot_log
program define Ey_boot_log, eclass
drop if t!=14
twopm payment treatment schemedummynew*, firstpart(probit) secondpart(regress, log) vce(cluster customer)
margins, dydx(treatment) predict(duan) nose post
end

** Generate set of random payment amounts ** This will be used to simulate treatment effects **
set seed 551

use "$filepath/data/benchmark_data.dta" , clear
keep if t==14 & payment>0 & payment!=.
keep payment
rename payment randompayment 
quietly generate random = runiform() 
sort random
expand 4
gen customer=_n
quietly save "$filepath/temp/randompayment", replace



local i=1
while `i'<= 1000 {

set seed 551`i'

* Treatment Assignment *
quietly use "$filepath/temp/treatmentassignment.dta", clear
quietly sample 33.333333333333333333, by(schemedummy)
quietly replace treatment = 0 
cd "$filepath/temp"
quietly save assignmentA`i'.dta, replace 


quietly use treatmentassignment.dta, clear
quietly merge m:1 customer using assignmentA`i', keepusing(treatment) replace update nogen 
quietly drop if treatment==0
quietly sample 50, by(schemedummy)
quietly replace treatment = 1 
quietly save assignmentB`i'.dta, replace 

*** 
quietly use "$filepath/data/benchmark_data.dta" , clear
quietly merge m:1 customer using assignmentA`i', keepusing(treatment) replace update nogen 
quietly merge m:1 customer using assignmentB`i', keepusing(treatment) replace update nogen 
 
quietly erase assignmentA`i'.dta
quietly erase assignmentB`i'.dta

quietly gen draw=`i'
quietly drop if payment==. // attrited customer


*** Simulate treatment Effect ***

quietly generate random = runiform() if payment==0 & treatment==1 & t==14
quietly merge m:1 customer using "$filepath/temp/randompayment", keepusing(randompayment) replace update nogen 

quietly gen payer=0 
quietly replace payer=1 if payment>0

quietly tab treatment payer if t==14, matcell(M)
quietly di M[2,1]*((M[2,2]/(M[2,2]+M[2,1])+0.05)*(M[2,2]+M[2,1])-M[2,2])/M[2,1] // This estimates the number of non-paying customers in the treatment group, that need to be treated in order to have a 5 percentage point effect on the extensive margin

quietly sort random
quietly gen n=_n
quietly replace payment=randompayment if treatment==1 & t==14 & payment==0 & n<=M[2,1]*((M[2,2]/(M[2,2]+M[2,1])+0.05)*(M[2,2]+M[2,1])-M[2,2])/M[2,1] // this assigns a randomly chosen positive value from the set of positive payment values
replace payment=payment*1.035 if treatment==1 & t==14 & payment>0  // 3.5 % effect on intensive margin

* need to define vars here, because otherwise we would use unmodified value
quietly gen lnpayment=ln(payment) 
quietly gen asinhpayment=asinh(payment) 
quietly gen ln1payment=ln(payment+1) 
quietly winsor payment if t==14, gen(paymentw) p(0.01)
quietly gen paymentyesno=0 
quietly replace paymentyesno=1 if payment>0 & payment!=.

 
quietly replace treatment=0 if treatment==1 & t<14
quietly xtset customer t


/*Pure DD ESTIMATES */
quietly xtreg payment treatment i.t if t<=14, fe vce(cluster customer)
quietly gen DDbeta=_b[treatment]
quietly gen DDse=_se[treatment]
quietly gen DDpvalue = (2 * ttail(e(df_r), abs(DDbeta/DDse)))


quietly keep if t==14 /// Panel data no longer needed

/*OLS ESTIMATES Simple */
quietly reg payment treatment if t==14, rob
quietly gen OLSpurebeta=_b[treatment]
quietly gen OLSpureconstant=_b[_cons]
quietly gen OLSpurese=_se[treatment]
quietly gen OLSpurepvalue = (2 * ttail(e(df_r), abs(OLSpurebeta/OLSpurese)))

/*OLS ESTIMATES Simple */
quietly reg payment treatment schemedummynew* if t==14, rob
quietly gen OLSsimplebeta=_b[treatment]
quietly gen OLSsimplese=_se[treatment]
quietly gen OLSsimplepvalue = (2 * ttail(e(df_r), abs(OLSsimplebeta/OLSsimplese)))

/*OLS ESTIMATES + Controls*/
quietly reg payment treatment premeaninvoicew presumpaymentratiow preage_of_accountw preclosingbalancew prepaidcounttotal interviewerdummy* datedummy* schemedummynew* if t==14, rob
quietly gen OLScontrolsbeta=_b[treatment]
quietly gen OLScontrolsse=_se[treatment]
quietly gen OLScontrolspvalue = (2 * ttail(e(df_r), abs(OLScontrolsbeta/OLScontrolsse)))

** Get winsorized mean value of control group *
quietly reg paymentw treatment if t==14, rob
quietly gen OLSpaymentwconstant=_b[_cons]

/*Winsorized ESTIMATES */
quietly reg paymentw treatment schemedummynew* if t==14, rob
quietly gen OLSwinsorbeta=_b[treatment]
quietly gen OLSwinsorse=_se[treatment]
quietly gen OLSwinsorpvalue = (2 * ttail(e(df_r), abs(OLSwinsorbeta/OLSwinsorse)))

/*IHS OLS ESTIMATES */
quietly reg asinhpayment treatment schemedummynew* if t==14, rob
quietly gen OLSasinhbeta=_b[treatment]
quietly gen OLSasinhse=_se[treatment]
quietly gen OLSasinhpvalue = (2 * ttail(e(df_r), abs(OLSasinhbeta/OLSasinhse)))

/*Log OLS ESTIMATES */
quietly reg ln1payment treatment schemedummynew* if t==14, rob
quietly gen OLSlnbeta=_b[treatment]
quietly gen OLSlnse=_se[treatment]
quietly gen OLSlnpvalue = (2 * ttail(e(df_r), abs(OLSlnbeta/OLSlnse)))

/*Cube root ESTIMATES */
quietly gen paymentcuberoot=payment^(1/3)
quietly reg paymentcuberoot treatment schemedummynew* if t==14, rob
quietly gen OLScuberootbeta=_b[treatment]
quietly gen OLScuberootse=_se[treatment]
quietly gen OLScuberootpvalue = (2 * ttail(e(df_r), abs(OLScuberootbeta/OLScuberootse)))

* Extensive Margin *
quietly probit paymentyesno treatment schemedummynew* if t==14 , rob
quietly margins, dydx(treatment) post
quietly gen EMbeta=_b[treatment]
quietly gen EMse=_se[treatment]
quietly gen EMpvalue = 2*(1-normal(abs(EMbeta/EMse)))

*Intensive Margin*
quietly reg lnpayment treatment schemedummynew* if t==14 , rob
quietly gen IMbeta=_b[treatment]
quietly gen IMse=_se[treatment]
quietly gen IMpvalue = (2 * ttail(e(df_r), abs(IMbeta/IMse)))

*TWO PM ESTIMATES - log 
quietly Ey_boot_log
quietly gen TWOPMbeta=_b[treatment]
bootstrap _b, seed(511)  reps(1000): Ey_boot_log
quietly gen TWOPMse=_se[treatment]
quietly gen TWOPMpvalue = 2*(1-normal(abs(TWOPMbeta/TWOPMse)))


quietly keep OLS* DD*  IM* EM*  TWO* draw paymentw
quietly collapse OLS* DD* IM* EM* TWO* paymentw, by(draw)
cd "$filepath/temp/"
quietly save values`i'.dta,replace

quietly use results.dta
quietly append using values`i'.dta
quietly sort draw
quietly save results.dta, replace
quietly erase values`i'.dta

di `i'

local i=`i'+1

}

use results, clear

log using "$filepath/output_logs/table A12 - beta std power", replace

** This shows beta and std. dev. of simulated treatment effects

sum OLSpureconstant OLSpureb EMb IMb TWOPMbeta OLSsimpleb OLScontrolsb DDb OLSasinhb OLSlnbeta OLSwinsorbeta OLScuberootbeta

foreach var in OLSpurep EMp IMp TWOPMpvalue OLSsimplep OLScontrolsp DDp OLSasinhp OLSlnpvalue OLSwinsorpvalue OLScuberootpvalue   {
gen power`var'=(`var'<0.05)
}

** This shows power 

sum power*


log close








********************* TYPE 1 ERROR ****************


*Generate Outcome Dataset *
clear
gen draw=.
save "$filepath/temp/results_noeffect.dta", replace 


*** Define TWO PM program for loop - Log second part **
capture program drop Ey_boot_log
program define Ey_boot_log, eclass
drop if t!=14
twopm payment treatment schemedummynew*, firstpart(probit) secondpart(regress, log) vce(cluster customer)
margins, dydx(treatment) predict(duan) nose post
end


local i=1
while `i'<= 1000 {

set seed 551`i'

* Treatment Assignment *
quietly use "$filepath/temp/treatmentassignment.dta", clear
quietly sample 33.333333333333333333, by(schemedummy)
quietly replace treatment = 0 
cd "$filepath/temp"
quietly save assignmentA`i'.dta, replace 


quietly use treatmentassignment.dta, clear
quietly merge m:1 customer using assignmentA`i', keepusing(treatment) replace update nogen 
quietly drop if treatment==0
quietly sample 50, by(schemedummy)
quietly replace treatment = 1 
quietly save assignmentB`i'.dta, replace 

*** 
quietly use "$filepath/data/benchmark_data.dta" , clear
quietly merge m:1 customer using assignmentA`i', keepusing(treatment) replace update nogen 
quietly merge m:1 customer using assignmentB`i', keepusing(treatment) replace update nogen 
 
quietly erase assignmentA`i'.dta
quietly erase assignmentB`i'.dta

quietly gen draw=`i'
quietly drop if payment==. // attrited customer


quietly gen lnpayment=ln(payment) 
quietly gen asinhpayment=asinh(payment) 
quietly gen ln1payment=ln(payment+1) 
quietly winsor payment if t==14, gen(paymentw) p(0.01)
quietly gen paymentyesno=0 
quietly replace paymentyesno=1 if payment>0 & payment!=.

 
quietly replace treatment=0 if treatment==1 & t<14
quietly xtset customer t


/*Pure DD ESTIMATES */
quietly xtreg payment treatment i.t if t<=14, fe vce(cluster customer)
quietly gen DDbeta=_b[treatment]
quietly gen DDse=_se[treatment]
quietly gen DDpvalue = (2 * ttail(e(df_r), abs(DDbeta/DDse)))


quietly keep if t==14 /// Panel data no longer needed

/*OLS ESTIMATES Simple */
quietly reg payment treatment if t==14, rob
quietly gen OLSpurebeta=_b[treatment]
quietly gen OLSpureconstant=_b[_cons]
quietly gen OLSpurese=_se[treatment]
quietly gen OLSpurepvalue = (2 * ttail(e(df_r), abs(OLSpurebeta/OLSpurese)))

/*OLS ESTIMATES Simple */
quietly reg payment treatment schemedummynew* if t==14, rob
quietly gen OLSsimplebeta=_b[treatment]
quietly gen OLSsimplese=_se[treatment]
quietly gen OLSsimplepvalue = (2 * ttail(e(df_r), abs(OLSsimplebeta/OLSsimplese)))

/*OLS ESTIMATES + Controls*/
quietly reg payment treatment premeaninvoicew presumpaymentratiow preage_of_accountw preclosingbalancew prepaidcounttotal interviewerdummy* datedummy* schemedummynew* if t==14, rob
quietly gen OLScontrolsbeta=_b[treatment]
quietly gen OLScontrolsse=_se[treatment]
quietly gen OLScontrolspvalue = (2 * ttail(e(df_r), abs(OLScontrolsbeta/OLScontrolsse)))

** Get winsorized mean value of control group *
quietly reg paymentw treatment if t==14, rob
quietly gen OLSpaymentwconstant=_b[_cons]

/*Winsorized ESTIMATES */
quietly reg paymentw treatment schemedummynew* if t==14, rob
quietly gen OLSwinsorbeta=_b[treatment]
quietly gen OLSwinsorse=_se[treatment]
quietly gen OLSwinsorpvalue = (2 * ttail(e(df_r), abs(OLSwinsorbeta/OLSwinsorse)))

/*IHS OLS ESTIMATES */
quietly reg asinhpayment treatment schemedummynew* if t==14, rob
quietly gen OLSasinhbeta=_b[treatment]
quietly gen OLSasinhse=_se[treatment]
quietly gen OLSasinhpvalue = (2 * ttail(e(df_r), abs(OLSasinhbeta/OLSasinhse)))

/*Log OLS ESTIMATES */
quietly reg ln1payment treatment schemedummynew* if t==14, rob
quietly gen OLSlnbeta=_b[treatment]
quietly gen OLSlnse=_se[treatment]
quietly gen OLSlnpvalue = (2 * ttail(e(df_r), abs(OLSlnbeta/OLSlnse)))

/*Cube root ESTIMATES */
quietly gen paymentcuberoot=payment^(1/3)
quietly reg paymentcuberoot treatment schemedummynew* if t==14, rob
quietly gen OLScuberootbeta=_b[treatment]
quietly gen OLScuberootse=_se[treatment]
quietly gen OLScuberootpvalue = (2 * ttail(e(df_r), abs(OLScuberootbeta/OLScuberootse)))

* Extensive Margin *
quietly probit paymentyesno treatment schemedummynew* if t==14 , rob
quietly margins, dydx(treatment) post
quietly gen EMbeta=_b[treatment]
quietly gen EMse=_se[treatment]
quietly gen EMpvalue = 2*(1-normal(abs(EMbeta/EMse)))

*Intensive Margin*
quietly reg lnpayment treatment schemedummynew* if t==14 , rob
quietly gen IMbeta=_b[treatment]
quietly gen IMse=_se[treatment]
quietly gen IMpvalue = (2 * ttail(e(df_r), abs(IMbeta/IMse)))

*TWO PM ESTIMATES - log 
quietly Ey_boot_log
quietly gen TWOPMbeta=_b[treatment]
bootstrap _b, seed(511)  reps(1000): Ey_boot_log
quietly gen TWOPMse=_se[treatment]
quietly gen TWOPMpvalue = 2*(1-normal(abs(TWOPMbeta/TWOPMse)))


quietly keep OLS* DD*  IM* EM*  TWO* draw paymentw
quietly collapse OLS* DD* IM* EM* TWO* paymentw, by(draw)
quietly save values`i'.dta,replace

quietly use results_noeffect.dta
quietly append using values`i'.dta
quietly sort draw
quietly save results_noeffect.dta, replace
cd "$filepath/temp/"
quietly erase values`i'.dta

di `i'

local i=`i'+1

}

use results_noeffect, clear
 
 ** This shows beta and std. dev. of simulated treatment effects (average effect size should be zero)
sum OLSpureconstant OLSpaymentwconstant OLSpureb OLSsimpleb OLScontrolsb OLSwinsorbeta OLSlnbeta OLSasinhb OLScuberootbeta EMb IMb TWOPMbeta DDb  


log using "$filepath/output_logs/table A12 - type 1 error", replace

** This shows type 1 error rate
foreach var in OLSpurep OLSsimplep OLScontrolsp OLSwinsorpvalue OLSlnpvalue OLSasinhp OLScuberootpvalue TWOPMpvalue EMp IMp DDp  {
gen t1err`var'=(`var'<0.05)
}

sum t1err*

log close

